#!/bin/bash
# Semantic Router KServe Deployment Helper Script
# This script simplifies deploying the semantic router to work with OpenShift AI KServe InferenceServices
# It handles variable substitution, validation, and deployment

set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# Default values
NAMESPACE=""
INFERENCESERVICE_NAME=""
MODEL_NAME=""
STORAGE_CLASS=""
MODELS_PVC_SIZE="10Gi"
CACHE_PVC_SIZE="5Gi"
# Embedding model for semantic caching and tools similarity
# Common options from sentence-transformers:
#   - all-MiniLM-L12-v2 (default, balanced speed/quality)
#   - all-mpnet-base-v2 (higher quality, slower)
#   - all-MiniLM-L6-v2 (faster, lower quality)
#   - paraphrase-multilingual-MiniLM-L12-v2 (multilingual)
EMBEDDING_MODEL="all-MiniLM-L12-v2"
DRY_RUN=false
SKIP_VALIDATION=false

# Usage function
usage() {
    cat << EOF
Usage: $0 [OPTIONS]

Deploy vLLM Semantic Router for OpenShift AI KServe InferenceServices

Required Options:
  -n, --namespace NAMESPACE          OpenShift namespace to deploy to
  -i, --inferenceservice NAME        Name of the KServe InferenceService
  -m, --model MODEL_NAME             Model name as reported by the InferenceService

Optional:
  -s, --storage-class CLASS          StorageClass for PVCs (default: cluster default)
  --models-pvc-size SIZE             Size for models PVC (default: 10Gi)
  --cache-pvc-size SIZE              Size for cache PVC (default: 5Gi)
  --embedding-model MODEL            BERT embedding model (default: all-MiniLM-L12-v2)
  --dry-run                          Generate manifests without applying
  --skip-validation                  Skip pre-deployment validation
  -h, --help                         Show this help message

Examples:
  # Deploy to namespace 'semantic' with granite32-8b model
  $0 -n semantic -i granite32-8b -m granite32-8b

  # Deploy with custom storage class and embedding model
  $0 -n myproject -i llama3-70b -m llama3-70b -s gp3-csi --embedding-model all-mpnet-base-v2

  # Dry run to see what will be deployed
  $0 -n semantic -i granite32-8b -m granite32-8b --dry-run

Prerequisites:
  - OpenShift CLI (oc) installed and logged in
  - OpenShift AI (RHOAI) with KServe installed
  - InferenceService already deployed
  - Cluster admin or namespace admin permissions

For more information, see README.md
EOF
    exit 1
}

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        -n|--namespace)
            NAMESPACE="$2"
            shift 2
            ;;
        -i|--inferenceservice)
            INFERENCESERVICE_NAME="$2"
            shift 2
            ;;
        -m|--model)
            MODEL_NAME="$2"
            shift 2
            ;;
        -s|--storage-class)
            STORAGE_CLASS="$2"
            shift 2
            ;;
        --models-pvc-size)
            MODELS_PVC_SIZE="$2"
            shift 2
            ;;
        --cache-pvc-size)
            CACHE_PVC_SIZE="$2"
            shift 2
            ;;
        --embedding-model)
            EMBEDDING_MODEL="$2"
            shift 2
            ;;
        --dry-run)
            DRY_RUN=true
            shift
            ;;
        --skip-validation)
            SKIP_VALIDATION=true
            shift
            ;;
        -h|--help)
            usage
            ;;
        *)
            echo -e "${RED}Unknown option: $1${NC}"
            usage
            ;;
    esac
done

# Validate required arguments
if [ -z "$NAMESPACE" ] || [ -z "$INFERENCESERVICE_NAME" ] || [ -z "$MODEL_NAME" ]; then
    echo -e "${RED}Error: Missing required arguments${NC}"
    usage
fi

# Banner
echo ""
echo "=================================================="
echo "  vLLM Semantic Router - KServe Deployment"
echo "=================================================="
echo ""

# Display configuration
echo -e "${BLUE}Configuration:${NC}"
echo "  Namespace:              $NAMESPACE"
echo "  InferenceService:       $INFERENCESERVICE_NAME"
echo "  Model Name:             $MODEL_NAME"
echo "  Embedding Model:        $EMBEDDING_MODEL"
echo "  Storage Class:          ${STORAGE_CLASS:-<cluster default>}"
echo "  Models PVC Size:        $MODELS_PVC_SIZE"
echo "  Cache PVC Size:         $CACHE_PVC_SIZE"
echo "  Dry Run:                $DRY_RUN"
echo ""

# Pre-deployment validation
if [ "$SKIP_VALIDATION" = false ]; then
    echo -e "${BLUE}Step 1: Validating prerequisites...${NC}"

    # Check oc command
    if ! command -v oc &> /dev/null; then
        echo -e "${RED}✗ Error: 'oc' command not found. Please install OpenShift CLI.${NC}"
        exit 1
    fi
    echo -e "${GREEN}✓${NC} OpenShift CLI found"

    # Check if logged in
    if ! oc whoami &> /dev/null; then
        echo -e "${RED}✗ Error: Not logged in to OpenShift. Run 'oc login' first.${NC}"
        exit 1
    fi
    echo -e "${GREEN}✓${NC} Logged in as $(oc whoami)"

    # Check if namespace exists
    if ! oc get namespace "$NAMESPACE" &> /dev/null; then
        echo -e "${YELLOW}⚠ Warning: Namespace '$NAMESPACE' does not exist.${NC}"
        read -p "Create namespace? (y/n) " -n 1 -r
        echo
        if [[ $REPLY =~ ^[Yy]$ ]]; then
            oc create namespace "$NAMESPACE"
            echo -e "${GREEN}✓${NC} Created namespace: $NAMESPACE"
        else
            echo -e "${RED}✗ Aborted${NC}"
            exit 1
        fi
    else
        echo -e "${GREEN}✓${NC} Namespace exists: $NAMESPACE"
    fi

    # Check if InferenceService exists
    if ! oc get inferenceservice "$INFERENCESERVICE_NAME" -n "$NAMESPACE" &> /dev/null; then
        echo -e "${RED}✗ Error: InferenceService '$INFERENCESERVICE_NAME' not found in namespace '$NAMESPACE'${NC}"
        echo "  Please deploy your InferenceService first."
        exit 1
    fi
    echo -e "${GREEN}✓${NC} InferenceService exists: $INFERENCESERVICE_NAME"

    # Check if InferenceService is ready
    ISVC_READY=$(oc get inferenceservice "$INFERENCESERVICE_NAME" -n "$NAMESPACE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
    if [ "$ISVC_READY" != "True" ]; then
        echo -e "${YELLOW}⚠ Warning: InferenceService '$INFERENCESERVICE_NAME' is not ready yet${NC}"
        echo "  Status: $(oc get inferenceservice "$INFERENCESERVICE_NAME" -n "$NAMESPACE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].message}')"
        read -p "Continue anyway? (y/n) " -n 1 -r
        echo
        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
            exit 1
        fi
    else
        echo -e "${GREEN}✓${NC} InferenceService is ready"
    fi

    # Get predictor service URL
    PREDICTOR_URL=$(oc get inferenceservice "$INFERENCESERVICE_NAME" -n "$NAMESPACE" -o jsonpath='{.status.components.predictor.address.url}' 2>/dev/null || echo "")
    if [ -n "$PREDICTOR_URL" ]; then
        echo -e "${GREEN}✓${NC} Predictor URL: $PREDICTOR_URL"
    fi

    # Create stable ClusterIP service for predictor (KServe creates headless service by default)
    echo "Creating stable ClusterIP service for predictor..."

    # Use template file for stable service
    if [ -f "$SCRIPT_DIR/service-predictor-stable.yaml" ]; then
        substitute_vars "$SCRIPT_DIR/service-predictor-stable.yaml" "$TEMP_DIR/service-predictor-stable.yaml.tmp"
        oc apply -f "$TEMP_DIR/service-predictor-stable.yaml.tmp" -n "$NAMESPACE" > /dev/null 2>&1
    else
        # Fallback to inline creation if template not found
        cat <<EOF | oc apply -f - -n "$NAMESPACE" > /dev/null 2>&1
apiVersion: v1
kind: Service
metadata:
  name: ${INFERENCESERVICE_NAME}-predictor-stable
  labels:
    app: ${INFERENCESERVICE_NAME}
    component: predictor-stable
    managed-by: semantic-router-deploy
  annotations:
    description: "Stable ClusterIP service for semantic router (KServe headless service doesn't provide stable IP)"
spec:
  type: ClusterIP
  selector:
    serving.kserve.io/inferenceservice: ${INFERENCESERVICE_NAME}
  ports:
  - name: http
    port: 8080
    targetPort: 8080
    protocol: TCP
EOF
    fi

    # Get the stable ClusterIP
    PREDICTOR_SERVICE_IP=$(oc get svc "${INFERENCESERVICE_NAME}-predictor-stable" -n "$NAMESPACE" -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
    if [ -z "$PREDICTOR_SERVICE_IP" ]; then
        echo -e "${RED}✗ Error: Could not get predictor service ClusterIP${NC}"
        echo "  The stable service was not created properly."
        exit 1
    fi
    echo -e "${GREEN}✓${NC} Predictor service ClusterIP: $PREDICTOR_SERVICE_IP (stable across pod restarts)"

    echo ""
fi

# Generate manifests
echo -e "${BLUE}Step 2: Generating manifests...${NC}"

TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$TEMP_DIR"' EXIT

# Function to substitute variables in a file
substitute_vars() {
    local input_file="$1"
    local output_file="$2"

    sed -e "s/{{NAMESPACE}}/$NAMESPACE/g" \
        -e "s/{{INFERENCESERVICE_NAME}}/$INFERENCESERVICE_NAME/g" \
        -e "s/{{MODEL_NAME}}/$MODEL_NAME/g" \
        -e "s|{{EMBEDDING_MODEL}}|$EMBEDDING_MODEL|g" \
        -e "s/{{PREDICTOR_SERVICE_IP}}/${PREDICTOR_SERVICE_IP:-10.0.0.1}/g" \
        -e "s/{{MODELS_PVC_SIZE}}/$MODELS_PVC_SIZE/g" \
        -e "s/{{CACHE_PVC_SIZE}}/$CACHE_PVC_SIZE/g" \
        "$input_file" > "$output_file"

    # Handle storage class (optional)
    if [ -n "$STORAGE_CLASS" ]; then
        sed -i.bak "s/# storageClassName:.*/storageClassName: $STORAGE_CLASS/g" "$output_file"
        rm -f "${output_file}.bak"
    fi
}

# Process each manifest file
for file in serviceaccount.yaml pvc.yaml configmap-router-config.yaml configmap-envoy-config.yaml peerauthentication.yaml deployment.yaml service.yaml route.yaml; do
    if [ -f "$SCRIPT_DIR/$file" ]; then
        substitute_vars "$SCRIPT_DIR/$file" "$TEMP_DIR/$file"
        echo -e "${GREEN}✓${NC} Generated: $file"
    else
        echo -e "${YELLOW}⚠ Skipping missing file: $file${NC}"
    fi
done

echo ""

# Dry run - just show what would be deployed
if [ "$DRY_RUN" = true ]; then
    echo -e "${BLUE}Dry run mode - Generated manifests:${NC}"
    echo ""
    for file in "$TEMP_DIR"/*.yaml; do
        echo "--- $(basename "$file") ---"
        cat "$file"
        echo ""
    done

    echo -e "${YELLOW}Dry run complete. No resources were created.${NC}"
    echo "To deploy for real, run without --dry-run flag."
    exit 0
fi

# Deploy manifests
echo -e "${BLUE}Step 3: Deploying to OpenShift...${NC}"

oc apply -f "$TEMP_DIR/serviceaccount.yaml" -n "$NAMESPACE"
oc apply -f "$TEMP_DIR/pvc.yaml" -n "$NAMESPACE"
oc apply -f "$TEMP_DIR/configmap-router-config.yaml" -n "$NAMESPACE"
oc apply -f "$TEMP_DIR/configmap-envoy-config.yaml" -n "$NAMESPACE"
oc apply -f "$TEMP_DIR/peerauthentication.yaml" -n "$NAMESPACE"
oc apply -f "$TEMP_DIR/deployment.yaml" -n "$NAMESPACE"
oc apply -f "$TEMP_DIR/service.yaml" -n "$NAMESPACE"
oc apply -f "$TEMP_DIR/route.yaml" -n "$NAMESPACE"

echo -e "${GREEN}✓${NC} Resources deployed successfully"
echo ""

# Wait for deployment
echo -e "${BLUE}Step 4: Waiting for deployment to be ready...${NC}"
echo "This may take a few minutes while models are downloaded..."
echo ""

# Monitor pod status
for i in {1..60}; do
    POD_STATUS=$(oc get pods -l app=semantic-router -n "$NAMESPACE" -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "")
    POD_NAME=$(oc get pods -l app=semantic-router -n "$NAMESPACE" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")

    if [ "$POD_STATUS" = "Running" ]; then
        READY=$(oc get pods -l app=semantic-router -n "$NAMESPACE" -o jsonpath='{.items[0].status.containerStatuses[*].ready}' 2>/dev/null || echo "")
        if [[ "$READY" == *"true true"* ]]; then
            echo -e "${GREEN}✓${NC} Pod is ready: $POD_NAME"
            break
        fi
    fi

    # Show init container progress
    INIT_STATUS=$(oc get pods -l app=semantic-router -n "$NAMESPACE" -o jsonpath='{.items[0].status.initContainerStatuses[0].state.running}' 2>/dev/null || echo "")
    if [ -n "$INIT_STATUS" ]; then
        echo -ne "\r  Initializing... (downloading models - this takes 2-3 minutes)"
    else
        echo -ne "\r  Waiting for pod... ($i/60)"
    fi

    sleep 5
done

echo ""

# Check final status
if ! oc get pods -l app=semantic-router -n "$NAMESPACE" -o jsonpath='{.items[0].status.containerStatuses[*].ready}' 2>/dev/null | grep -q "true true"; then
    echo -e "${YELLOW}⚠ Warning: Pod may not be fully ready yet${NC}"
    echo "  Check status with: oc get pods -l app=semantic-router -n $NAMESPACE"
    echo "  View logs with: oc logs -l app=semantic-router -c semantic-router -n $NAMESPACE"
fi

echo ""

# Get route URL
ROUTE_URL=$(oc get route semantic-router-kserve -n "$NAMESPACE" -o jsonpath='{.spec.host}' 2>/dev/null || echo "")
if [ -n "$ROUTE_URL" ]; then
    echo -e "${GREEN}✓${NC} External URL: https://$ROUTE_URL"
else
    echo -e "${YELLOW}⚠ Could not determine route URL${NC}"
fi

echo ""
echo "=================================================="
echo "  Deployment Complete!"
echo "=================================================="
echo ""
echo "Next steps:"
echo ""
echo "1. Test the deployment:"
echo "   curl -k \"https://$ROUTE_URL/v1/models\""
echo ""
echo "2. Try a chat completion:"
echo "   curl -k \"https://$ROUTE_URL/v1/chat/completions\" \\"
echo "     -H 'Content-Type: application/json' \\"
echo "     -d '{\"model\": \"$MODEL_NAME\", \"messages\": [{\"role\": \"user\", \"content\": \"Hello!\"}]}'"
echo ""
echo "3. Run validation tests:"
echo "   NAMESPACE=$NAMESPACE MODEL_NAME=$MODEL_NAME $SCRIPT_DIR/test-semantic-routing.sh"
echo ""
echo "4. View logs:"
echo "   oc logs -l app=semantic-router -c semantic-router -n $NAMESPACE -f"
echo ""
echo "5. Monitor metrics:"
echo "   oc port-forward -n $NAMESPACE svc/semantic-router-kserve 9190:9190"
echo "   curl http://localhost:9190/metrics"
echo ""

# Offer to run tests
if [ "$SKIP_VALIDATION" = false ]; then
    echo ""
    read -p "Run validation tests now? (y/n) " -n 1 -r
    echo
    if [[ $REPLY =~ ^[Yy]$ ]]; then
        echo ""
        export NAMESPACE MODEL_NAME
        bash "$SCRIPT_DIR/test-semantic-routing.sh" || true
    fi
fi

echo ""
echo "For more information, see: $SCRIPT_DIR/README.md"
echo ""
